# -------------------------------------------------------------------
# Purpose: Creates Table 8
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(poutput))


# Load data
load(file.path(pdataanalysis, "countyLevel19001940.RData"))


# Subset data
d1 <- countyLevel19001940[!is.na(entropy_occ1950_adjp)]
d2 <- countyLevel19001940[!is.na(entropy_uspto_techn)]
d3 <- countyLevel19001940[!is.na(segregation_namelast_mp_adjp) & !is.na(segregation_coo_adjp)]
d4 <- countyLevel19001940[!is.na(sft_pc1)]

# Winsorize and normalize variables
vars <- names(countyLevel19001940) %>% 
  str_subset("entropy|sum_n_namelast_mp_adjp|^segregation|sft_pc1|n_uspto_techn") %>% 
  str_subset(., "_ws$", negate = TRUE)
d1[, paste0(vars, "_ws") := lapply(.SD, function(x) scale(winsorize(x, probs = c(.01, .99)))), .SDcols = vars]
d2[, paste0(vars, "_ws") := lapply(.SD, function(x) scale(winsorize(x, probs = c(.01, .99)))), .SDcols = vars]
d3[, paste0(vars, "_ws") := lapply(.SD, function(x) scale(winsorize(x, probs = c(.01, .99)))), .SDcols = vars]
d4[, paste0(vars, "_ws") := lapply(.SD, function(x) scale(winsorize(x, probs = c(.01, .99)))), .SDcols = vars]


# Regressions
o <- list()
o <- append(o, list(feols(entropy_occ1950_adjp_ws ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d1)))
o <- append(o, list(feols(entropy_occ1950_adjp_ws ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws + entropy_coo_adjp_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d1)))
o <- append(o, list(feols(entropy_uspto_techn_ws ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d2)))
o <- append(o, list(feols(entropy_uspto_techn_ws ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws + entropy_coo_adjp_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d2)))
o <- append(o, list(feols(segregation_namelast_mp_adjp_ws ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d3)))
o <- append(o, list(feols(segregation_namelast_mp_adjp_ws ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws + entropy_coo_adjp_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d3)))
o <- append(o, list(feols(segregation_coo_adjp_ws ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d3)))
o <- append(o, list(feols(segregation_coo_adjp_ws ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws + entropy_coo_adjp_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d3)))
o <- append(o, list(feols(sft_pc1_ws ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d4)))
o <- append(o, list(feols(sft_pc1_ws ~ entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws + entropy_coo_adjp_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d4)))

r <- list()
r <- append(r, list(feols(entropy_occ1950_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d1)))
r <- append(r, list(feols(entropy_occ1950_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_entropy_coo_adjp_fe_immig_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d1)))
r <- append(r, list(feols(entropy_uspto_techn_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d2)))
r <- append(r, list(feols(entropy_uspto_techn_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_entropy_coo_adjp_fe_immig_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d2)))
r <- append(r, list(feols(segregation_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d3)))
r <- append(r, list(feols(segregation_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_entropy_coo_adjp_fe_immig_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d3)))
r <- append(r, list(feols(segregation_coo_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d3)))
r <- append(r, list(feols(segregation_coo_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_entropy_coo_adjp_fe_immig_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d3)))
r <- append(r, list(feols(sft_pc1_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d4)))
r <- append(r, list(feols(sft_pc1_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_entropy_coo_adjp_fe_immig_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], d4)))

i <- list()
i <- append(i, list(feols(entropy_occ1950_adjp_ws ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d1)))
i <- append(i, list(feols(entropy_occ1950_adjp_ws ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws + entropy_coo_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_entropy_coo_adjp_fe_immig_ws, d1)))
i <- append(i, list(feols(entropy_uspto_techn_ws ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d2)))
i <- append(i, list(feols(entropy_uspto_techn_ws ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws + entropy_coo_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_entropy_coo_adjp_fe_immig_ws, d2)))
i <- append(i, list(feols(segregation_namelast_mp_adjp_ws ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d3)))
i <- append(i, list(feols(segregation_namelast_mp_adjp_ws ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws + entropy_coo_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_entropy_coo_adjp_fe_immig_ws, d3)))
i <- append(i, list(feols(segregation_coo_adjp_ws ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d3)))
i <- append(i, list(feols(segregation_coo_adjp_ws ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws + entropy_coo_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_entropy_coo_adjp_fe_immig_ws, d3)))
i <- append(i, list(feols(sft_pc1_ws ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, d4)))
i <- append(i, list(feols(sft_pc1_ws ~ 1 | gisjoin_1900 + statefip^year + gisjoin_1900[year] | entropy_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_ws + entropy_coo_adjp_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_entropy_coo_adjp_fe_immig_ws, d4)))


# F-statistics
dstata <- d1[, .(
  x1 = entropy_namelast_mp_adjp_ws, z1 = iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
  x2 = entropy_coo_adjp_ws, z2 = iv_lo_entropy_coo_adjp_fe_immig_ws,
  x3 = sum_n_namelast_mp_adjp_ws, z3 = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws,
  y = entropy_occ1950_adjp_ws,
  gisjoin_1900_f, statefip_f, year_f, year_num
)]
commands <- list(
  'ivreghdfe y (x1 x3= z1 z3), absorb(gisjoin_1900_f year_f#statefip_f gisjoin_1900_f##c.year_num) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   keep swf*
   keep if _n == 1',
  'ivreghdfe y (x1 x2 x3= z1 z2 z3), absorb(gisjoin_1900_f year_f#statefip_f gisjoin_1900_f##c.year_num) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   gen swf3 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   replace swf3 = round(e(first)["SWF",3])
   keep swf*
   keep if _n == 1'
)
swfstat <- as.character(unlist(lapply(commands, get_fstat_from_stata, data.in = dstata)))

dstata <- d2[, .(
  x1 = entropy_namelast_mp_adjp_ws, z1 = iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
  x2 = entropy_coo_adjp_ws, z2 = iv_lo_entropy_coo_adjp_fe_immig_ws,
  x3 = sum_n_namelast_mp_adjp_ws, z3 = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws,
  y1 = entropy_uspto_techn_ws,
  gisjoin_1900_f, statefip_f, year_f, year_num
)]
commands <- list(
  'ivreghdfe y1 (x1 x3= z1 z3), absorb(gisjoin_1900_f year_f#statefip_f gisjoin_1900_f##c.year_num) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   keep swf*
   keep if _n == 1',
  'ivreghdfe y1 (x1 x2 x3= z1 z2 z3), absorb(gisjoin_1900_f year_f#statefip_f gisjoin_1900_f##c.year_num) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   gen swf3 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   replace swf3 = round(e(first)["SWF",3])
   keep swf*
   keep if _n == 1'
)
swfstat <- c(swfstat, as.character(unlist(lapply(commands, get_fstat_from_stata, data.in = dstata))))

dstata <- d3[, .(
  x1 = entropy_namelast_mp_adjp_ws, z1 = iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
  x2 = entropy_coo_adjp_ws, z2 = iv_lo_entropy_coo_adjp_fe_immig_ws,
  x3 = sum_n_namelast_mp_adjp_ws, z3 = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws,
  y1 = segregation_namelast_mp_adjp_ws, y2 = segregation_coo_adjp_ws,
  gisjoin_1900_f, statefip_f, year_f, year_num
)]
commands <- list(
  'ivreghdfe y1 (x1 x3= z1 z3), absorb(gisjoin_1900_f year_f#statefip_f gisjoin_1900_f##c.year_num) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   keep swf*
   keep if _n == 1',
  'ivreghdfe y1 (x1 x2 x3= z1 z2 z3), absorb(gisjoin_1900_f year_f#statefip_f gisjoin_1900_f##c.year_num) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   gen swf3 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   replace swf3 = round(e(first)["SWF",3])
   keep swf*
   keep if _n == 1',
  'ivreghdfe y2 (x1 x3= z1 z3), absorb(gisjoin_1900_f year_f#statefip_f gisjoin_1900_f##c.year_num) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   keep swf*
   keep if _n == 1',
  'ivreghdfe y2 (x1 x2 x3= z1 z2 z3), absorb(gisjoin_1900_f year_f#statefip_f gisjoin_1900_f##c.year_num) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   gen swf3 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   replace swf3 = round(e(first)["SWF",3])
   keep swf*
   keep if _n == 1'
)
swfstat <- c(swfstat, as.character(unlist(lapply(commands, get_fstat_from_stata, data.in = dstata))))

dstata <- d4[, .(
  x1 = entropy_namelast_mp_adjp_ws, z1 = iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
  x2 = entropy_coo_adjp_ws, z2 = iv_lo_entropy_coo_adjp_fe_immig_ws,
  x3 = sum_n_namelast_mp_adjp_ws, z3 = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws,
  y = sft_pc1_ws,
  gisjoin_1900_f, statefip_f, year_f, year_num
)]
commands <- list(
  'ivreghdfe y (x1 x3= z1 z3), absorb(gisjoin_1900_f year_f#statefip_f gisjoin_1900_f##c.year_num) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   keep swf*
   keep if _n == 1',
  'ivreghdfe y (x1 x2 x3= z1 z2 z3), absorb(gisjoin_1900_f year_f#statefip_f gisjoin_1900_f##c.year_num) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   gen swf3 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   replace swf3 = round(e(first)["SWF",3])
   keep swf*
   keep if _n == 1'
)
swfstat <- c(swfstat, as.character(unlist(lapply(commands, get_fstat_from_stata, data.in = dstata))))



# Create table
setFixest_dict(
  c(
    entropy_namelast_mp_adjp_ws = "Surname diversity",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_ws = "Predicted surname diversity",
    entropy_coo_adjp_ws = "Ancestral-country diversity",
    iv_lo_entropy_coo_adjp_fe_immig_ws = "Predicted ancestral-country diversity",
    sum_n_namelast_mp_adjp_ws = "Population",
    iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws = " Predicted population",
    segregation_namelast_mp_adjp_ws = "\\makecell{Residential segregation\\\\of surname groups}",
    segregation_coo_adjp_ws = "\\makecell{Residential segregation of\\\\ ancestral-country groups}",
    entropy_occ1950_adjp_ws = "\\makecell{Occupational\\\\diversity}",
    entropy_uspto_techn_ws = "\\makecell{USPTO tech \\\\codes diversity}",
    sft_pc1_ws = "\\makecell{Strength of\\\\family ties}",
    year = "Period", statefip = "State", gisjoin_1900 = "County"
  )
)


tablename <- file.path(poutput, "table08.tex")
etable(o,
  cluster = ~statefip,
  fitstat = ~n, digits = "r3", digits.stats = "r3",
  order = c("diversity"),
  file = tablename, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
edit_table_content_fixed(tablename, "Period $\\times $ County", "County-specific linear trends")
add_table_row(tablename, "\\midrule", "\\multicolumn{2}{l}{\\textit{Panel A: Least-squares estimates}} &  \\multicolumn{9}{c}{}\\\\ \\cmidrule(lr){1-11}")
add_table_row(tablename, "\\makecell", "\\cmidrule(lr){2-3}  \\cmidrule(lr){4-5} \\cmidrule(lr){6-7}  \\cmidrule(lr){8-9}  \\cmidrule(lr){10-11}")
move_table_row(tablename, "Observations", "bottomrule")
add_table_row(tablename, "    \\\\", c("\\multicolumn{2}{l}{\\textit{Panel B: Reduced-form estimates}} &  \\multicolumn{9}{c}{}\\\\", "\\\\", "\\multicolumn{2}{l}{\\textit{Panel C: Instrumental-variable estimates}} &  \\multicolumn{9}{c}{}\\\\"))

temptable <- file.path(poutput, "temp.tex")
etable(r,
  cluster = ~statefip,
  fitstat = ~n, digits = "r3", digits.stats = "r3",
  order = c("diversity"),
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
add_table_row(tablename, "Panel B", c("\\cmidrule(lr){1-11}", estimates_rows))

temptable <- file.path(poutput, "temp.tex")
etable(i,
  cluster = ~statefip,
  fitstat = ~n, digits = "r3", digits.stats = "r3",
  order = c("diversity"),
  extralines = list("Sanderson-Windmeijer \\textit{F}-stat" = swfstat),
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
add_table_row(tablename, "Panel C", c("\\cmidrule(lr){1-11}", estimates_rows))
add_table_row(tablename, "Sanderson", "\\\\", "before")
file.remove(temptable)

cat("Table 8 saved to:", tablename, "\n")